Setup
world_rank | university_name | country | teaching | international | research | citations | income | total_score | num_students | student_staff_ratio | international_students | female_male_ratio | year | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Harvard University | United States of America | 99.7 | 72.4 | 98.7 | 98.8 | 34.5 | 96.1 | 20,152 | 8.9 | 25% | NaN | 2011 |
1 | 2 | California Institute of Technology | United States of America | 97.7 | 54.6 | 98.0 | 99.9 | 83.7 | 96.0 | 2,243 | 6.9 | 27% | 33 : 67 | 2011 |
2 | 3 | Massachusetts Institute of Technology | United States of America | 97.8 | 82.3 | 91.4 | 99.9 | 87.5 | 95.6 | 11,074 | 9.0 | 33% | 37 : 63 | 2011 |
3 | 4 | Stanford University | United States of America | 98.3 | 29.5 | 98.1 | 99.2 | 64.3 | 94.3 | 15,596 | 7.8 | 22% | 42 : 58 | 2011 |
4 | 5 | Princeton University | United States of America | 90.9 | 70.3 | 95.4 | 99.9 | - | 94.2 | 7,929 | 8.4 | 27% | 45 : 55 | 2011 |
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2603 entries, 0 to 2602
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 world_rank 2603 non-null object
1 university_name 2603 non-null object
2 country 2603 non-null object
3 teaching 2603 non-null float64
4 international 2603 non-null object
5 research 2603 non-null float64
6 citations 2603 non-null float64
7 income 2603 non-null object
8 total_score 2603 non-null object
9 num_students 2544 non-null object
10 student_staff_ratio 2544 non-null float64
11 international_students 2536 non-null object
12 female_male_ratio 2370 non-null object
13 year 2603 non-null int64
dtypes: float64(4), int64(1), object(9)
memory usage: 284.8+ KB
Line Plot
Code
#1. prepare data frame
df = timesData.iloc[:100,:]
#2 creating trace and data
trace1 = go.Scatter(
x = df.world_rank, #Sets the x coordinates
y = df.citations, #Sets the y coordinates
mode = "lines",#Determines the drawing mode for this scatter trace
)
data = [trace1]
#3 Set the layout version1
layout = dict(title = "Citation and Teaching")
#3 set the layout version2
#layout = go.Layout(title = "Citation and Teaching")
#4 create figure version1
fig = dict(data = data,layout = layout)
#4 create figure version2
#fig = go.Figure(data = data,layout = layout)
#5 plot figure
iplot(fig)
#fig.show()
add markers and text
Code
#1 data frame
df = timesData.iloc[:100]
#2 trace and data
trace = go.Scatter(
x = df.world_rank,
y = df.citations,
mode = "lines+markers", #add marker,
marker = dict(color = "rgba(16,112,2,0.8)"),
text = df.university_name #add text
)
data = [trace]
#3 layout and data
layout = go.Layout(
title = "citation",
xaxis = dict(title = "World Rank",ticklen = 5)
)
#4 create figure
fig = go.Figure(data = data,layout = layout)
#5 plot figure
fig.show()
version2가 뭔가 더 좋을듯?
Scatter Plot
Scatter plot과 lineplot 둘 다 go.Scatter 객체 사용. 다른점은 mode 설정
Code
#1. data frame
df2014 = timesData[timesData.year == 2014].iloc[:100,:]
#2. trace,data
trace = go.Scatter(
x = df2014.world_rank, #Sets the x coordinates
y = df2014.citations, #Sets the y coordinates
mode = "markers",
)
data = [trace]
#3. layout
layout = go.Layout(title = "Ciation vs world rank")
#4. create figure
fig = go.Figure(data = data,layout = layout)
#5. plot figure
fig.show()
add markers and text
Code
#1 data frame
df2014 = timesData[timesData.year == 2014].iloc[:100,:]
#2 trace,data
trace = go.Scatter(
x = df2014.world_rank,
y = df2014.citations,
mode = "markers",
#marker = dict(color = "green",opacity=0.8), #alpha(불투명도) 조절 vs1
marker = dict(color = "rgba(255,128,2,0.8)"), #alpha(불투명도) 조절 vs2
text = df2014.university_name,
)
data = [trace]
#3 layout
layout = go.Layout(xaxis = dict(title = "World Rank"),yaxis = dict(title = "Citation"))
#4 create figure
fig = go.Figure(data=data,layout=layout)
#5 plot
fig.show()
Histogram
Code
여러개의 차트 겹처 그리기
- 여기서는 histogram으로 했으나 다른차트들도 가능
Code
#1.dataframe
x2011 = timesData.student_staff_ratio[timesData.year == 2011]
x2012 = timesData.student_staff_ratio[timesData.year == 2012]
#2.trace&data
trace1 = go.Histogram(
x=x2011,
#opacity=0.7, #불투명도 조절
name="2011", #범례(legend)를 설정하기 위한 이름 설정
marker=dict(color="rgb(171,50,96)",opacity=0.7)
)
trace2 = go.Histogram(
x=x2012,
name="2012",
marker=dict(color="blue",opacity=0.7)
)
data=[trace1,trace2]
#3.layout
layout = go.Layout(
barmode = "overlay", #trace 겹쳐 그리기
xaxis=dict(title="students-staff ratio"),
yaxis=dict(title="count"),
title = dict(text = "histogram",x = 0.5)
)
#4 figure
fig = go.Figure(data=data,layout=layout)
fig.show()
참고자료 - Opacity와 alpha? : Opacity는 marker안팎에서 모두 쓰일 수 있으며 alpha는 rgba와 쓸때만 입력,같은 역할을 함. 단,Opacity를 marker의 밖에서 입력하면 trace안에서 밀도를 표현 하지 못함. 다른 trace끼리 겹칠때에는 밀도표현됨.(같은 trace에서만 안됨.)
Code
# 1.data frame
dataframe = timesData[timesData.year == 2015]
#2.trace and data
data = []
for col in ["world_rank","citations","income","total_score"]:
_trace = go.Scatter(
x = dataframe["world_rank"],
y = dataframe[col],
mode = "lines"
)
data.append(_trace)
#3. layout
layout = go.Layout(
xaxis=dict(
domain=[0, 0.45]
),
yaxis=dict(
domain=[0, 0.45]
),
xaxis2=dict(
domain=[0.55, 1]
),
xaxis3=dict(
domain=[0, 0.45],
anchor='y3'
),
xaxis4=dict(
domain=[0.55, 1],
anchor='y4'
),
yaxis2=dict(
domain=[0, 0.45],
anchor='x2'
),
yaxis3=dict(
domain=[0.55, 1]
),
yaxis4=dict(
domain=[0.55, 1],
anchor='x4'
),
title = 'Research, citation, income and total score VS World Rank of Universities'
)
#4. fig
fig = make_subplots(rows=2,cols=2)
#5. plot
row = 1
col = 1
for trace in data:
fig.append_trace(trace,row=row,col=col)
col+=1
if col > 2:
col = 1
row+=1
fig.show()
Code
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(
rows=2, cols=2,
specs=[[{"type": "xy"}, {"type": "polar"}],
[{"type": "domain"}, {"type": "scene"}]],
)
fig.add_trace(go.Bar(y=[2, 3, 1]),
row=1, col=1)
fig.add_trace(go.Barpolar(theta=[0, 45, 90], r=[2, 3, 1]),
row=1, col=2)
fig.add_trace(go.Pie(values=[2, 3, 1]),
row=2, col=1)
fig.add_trace(go.Scatter3d(x=[2, 3, 1], y=[0, 0, 0],
z=[0.5, 1, 2], mode="lines"),
row=2, col=2)
fig.update_layout(height=700, showlegend=False)
fig.show()
Vector
Vector field(quiver plot)
사전준비
- np.meshgrid : x좌표,y좌표를 가지는 벡터를 입력했을때, 두 벡터로 만들 수 있는 격자의 좌표(x,y)를 출력
Code
(10,) (10,)
(10, 10) (10, 10)
- 격자(grid,matrix)에 함수 적용하면? => matrix(x,y 각각의 좌표)의 모든 요소에 함수가 적용됨
- 배열의 요소 값 차례대로 읽어보기 …
(0,0),(0.2,0),(0.4,0) … (1.8,0) => (0,0.2),(0.2,0.2),(0.4,0.2)…
x좌표 다 읽고 y좌표증가 그 다음 x좌표 다 읽고 y좌표 증가 …
(array([[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8]]),
array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
[0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
[0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4],
[0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6],
[0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
[1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ],
[1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2],
[1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4],
[1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6],
[1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8]]))
Gradient Vector Field
\(\nabla f = xe^{-x^2-y^2}\)
Code
(array([[-1. , -0.97979798, -0.95959596, ..., 0.95959596,
0.97979798, 1. ],
[-1. , -0.97979798, -0.95959596, ..., 0.95959596,
0.97979798, 1. ],
[-1. , -0.97979798, -0.95959596, ..., 0.95959596,
0.97979798, 1. ],
...,
[-1. , -0.97979798, -0.95959596, ..., 0.95959596,
0.97979798, 1. ],
[-1. , -0.97979798, -0.95959596, ..., 0.95959596,
0.97979798, 1. ],
[-1. , -0.97979798, -0.95959596, ..., 0.95959596,
0.97979798, 1. ]]),
array([[-1. , -1. , -1. , ..., -1. ,
-1. , -1. ],
[-0.97979798, -0.97979798, -0.97979798, ..., -0.97979798,
-0.97979798, -0.97979798],
[-0.95959596, -0.95959596, -0.95959596, ..., -0.95959596,
-0.95959596, -0.95959596],
...,
[ 0.95959596, 0.95959596, 0.95959596, ..., 0.95959596,
0.95959596, 0.95959596],
[ 0.97979798, 0.97979798, 0.97979798, ..., 0.97979798,
0.97979798, 0.97979798],
[ 1. , 1. , 1. , ..., 1. ,
1. , 1. ]]))
1. 시점
- 종점은 화살표로 표시해야 하므로 시점만 만들기
Code
#1. prepare data
#첫번째 벡터의 시점 x[0],y[0],z[0] 종점 x[1],y[1],z[1]
#두번째 벡터의 시점 x[2],y[2],z[2] 종점 x[2],y[2],z[2]
#두 개씩 묶임
x = [10.1219, 10.42579, 15.21396, 15.42468, 20.29639,20.46268, 25.36298, 25.49156]
y = [5.0545, 5.180104, 5.0545, 5.20337, 5.0545, 5.194271, 5.0545, 5.231627]
z = [5.2713, 5.231409, 5.2713, 5.231409, 5.2713 , 5.235852, 5.2713, 5.231627]
#pairs = [(0,1),(2,3),(4,5),(6,7)]
[coord for coord in range(0,len(x),2)]
[0, 2, 4, 6]
Code
#2. trace,data(trace set)
trace1 = go.Scatter3d(
x=[x[coord] for coord in range(0,len(x),2)],
y=[y[coord] for coord in range(0,len(y),2)],
z=[z[coord] for coord in range(0,len(z),2)],
mode = "markers",
line=dict(color="red")
)
data = [trace1]
#3. Layout
layout = go.Layout(title=dict(text = "vectors"))
#4. figure
fig = go.Figure(data=data,layout=layout)
fig.show()
2. 선 만들기
Code
#1.prepare data
x_lines = list()
y_lines = list()
z_lines = list()
for i in range(len(x)):
x_lines.append(x[i])
y_lines.append(y[i])
z_lines.append(z[i])
#plotly에서 Scatter의 line mode는 점과 점 사이에 선을 만듦
#0,1번째 자리의 좌표에는 시점,종점을 넣고 3번째 자리에 None을 추가하여 점을 만들지 않음
#따라서, 선이 생기지 않음
if i % 2 == 1:
x_lines.append(None)
y_lines.append(None)
z_lines.append(None)
#2.trace and tr_set(=data)
trace2 = go.Scatter3d(
x=x_lines,
y=y_lines,
z=z_lines,
mode = "lines",
line = dict(width = 2, color = 'rgb(255, 0,0)')
)
data = [trace2]
#3.layout
layout = go.Layout(title = "lines")
#4.figure
fig = go.Figure(data=data,layout=layout)
#5.plotting
fig.show()
3.종점 만들기
Code
Code
import plotly.graph_objs as go
# plotly.offline.init_notebook_mode()
x = [10.1219, 10.42579, 15.21396, 15.42468, 20.29639,20.46268, 25.36298, 25.49156]
y = [5.0545, 5.180104, 5.0545, 5.20337, 5.0545, 5.194271, 5.0545, 5.231627]
z = [5.2713, 5.231409, 5.2713, 5.231409, 5.2713 , 5.235852, 5.2713, 5.231627]
pairs = [(0,1), (2,3),(4,5), (6,7)]
## plot ONLY the first ball in each pair of balls
trace1 = go.Scatter3d(
x=[x[p[0]] for p in pairs],
y=[y[p[0]] for p in pairs],
z=[z[p[0]] for p in pairs],
mode='markers',
name='markers',
line=dict(color='red')
)
go.Figure(data=trace1)